## A Case of Renewable Portfolio Standards in Nevada
## Dawson Frost, Inhwan Ko, Nicolas Wittstock
## Correspondence: Inhwan Ko (inhwanko at unr dot edu)

## Load packages
#install.pacakges("pacman")
pacman::p_load(MASS, tmap, sf, tidyverse, readxl, qdapRegex, 
               ggpubr, geosphere, rgeos, spatialreg, spdep)

################### 1. NV Ballot Initiative 6 2020 map ########################

## Load NV map (shapefile)
NVmap <- read_sf("Precincts2020/Precincts2020.shp")

## NV election results in Nov 2020
ge2020 <- read_excel("GE2020.xlsx")[-1:-2,] 
colnames(ge2020) <- c("county","precinct","contest","selection","vote")

## Extract NV State Question 6 results only
question6 <- ge2020 %>% filter(contest=="STATE QUESTION NO. 6") 

## Calculate vote share for Question 6
question6 <- question6 %>% 
  select(county, precinct, selection, vote) %>% 
  spread(selection, vote)
question6$Yes <- as.numeric(question6$Yes); question6$No <- as.numeric(question6$No)
question6$total <- question6$Yes + question6$No
question6$support <- 100*question6$Yes / question6$total

## Find which precinct did not have an election
question6$support[question6$total==0] <- NA
question6$election <- ifelse(
  question6$total==0, "No", "Yes" 
)
sum(is.nan(question6$support)) # all NA in the support means no election

## Assign FIPS county code
county <- unique(question6$county)
countycode <- c("510","001","003","005","007","009",
                "011","013","015","017","019","021",
                "023","027","029","031","033")
NVcounty <- data.frame(county=county, countycode=countycode)
question6 <- left_join(question6, NVcounty, by="county")

## Assign precinct code
question6 <- question6 %>% arrange(countycode, precinct)
NVmap <- NVmap %>% arrange(COUNTYFP20, NAME20)

## Manually assign codes by comparing ballot6 with NVmap dataset
## This is not ideal, but the precinct names are not systematically coded so manual work is unavoidable
## Need to use GEOID20 in future data generated by the state

# write.csv(question6, "question6.csv")
question6 <- read.csv("question6_refined.csv")[,c(-1,-2,-7,-10)]
question6$GEOID20 <- as.character(question6$GEOID20) ## unique precinct identifier

## Merge NVmap and Question 6 datasets
NVmap_merged <- left_join(NVmap, question6, by="GEOID20")
NVmap_merged$match <- ifelse(
  is.na(NVmap_merged$support) & is.na(NVmap_merged$election),
  "No","Yes" # No means no elections in the precinct in 2020; Yes means there were
)

## There were some missing precincts in Washoe county 
## Resolve this by incorporting those missing precincts from Washoe County Data
washoe <- read_sf("WashoeCo_Precincts_2020/WashoeCo_Precincts_2020.shp")
washoe <- st_transform(washoe, 4269)
missing <- seq(7592, 7599, by=1)
washoe <- washoe %>% 
  filter(PRECINCT %in% missing) %>% 
  select(PRECINCT, geometry) %>% 
  arrange(PRECINCT)

washoe$STATEFP20 <- rep(32,8)
washoe$COUNTYFP20 <- rep(031,8)
washoe$VTDST20 <- c("007592","007593","007594","007595","007596","007597","007598","007599")
washoe$GEOID20 <- c("32031007592","32031007593","32031007594","32031007595",
                    "32031007596","32031007597","32031007598","32031007599")
washoe$NAME20 <- c("Precinct No. 7592","Precinct No. 7593","Precinct No. 7594","Precinct No. 7595",
                   "Precinct No. 7596","Precinct No. 7597","Precinct No. 7598","Precinct No. 7599")
washoe$NAMELSAD20 <- washoe$NAME20
washoe$ALAND20 <- NA # fill in later
washoe$AWATER20 <- NA # fill in later
st_centroid(washoe)$geometry
washoe$INTPTLAT20 <- c("+39.8455", "+39.68025", "+39.857", "+39.88743", 
                       "+39.93875", "+39.81723", "+39.76407", "+39.72209")
washoe$INTPTLON20 <- c("-119.695","-119.6283","-119.7357","-119.7039",
                       "-119.8187","-119.5542","-119.7494","-119.6947")

missing <- question6 %>% 
  filter(GEOID20 %in% washoe$GEOID20)
washoe <- left_join(washoe, missing, by="GEOID20")
washoe$match <- rep("Yes",8)
washoe$jurisdiction <- rep("Washoe",8)

NVmap_merged <- NVmap_merged %>% 
  select(-VTDI20, -LSAD20, -MTFCC20, -FUNCSTAT20)
washoe <- washoe %>% 
  select(colnames(NVmap_merged))

NVmap_merged <- rbind(NVmap_merged, washoe)
NVmap_merged <- NVmap_merged %>% 
  arrange(STATEFP20, COUNTYFP20, GEOID20)

sum(NVmap_merged$total, na.rm=T)
sum(question6$total, na.rm=T)

## The number of total votes match: Map is synchronized to the election data in 2020

## preliminary drawing
#sf_use_s2(FALSE)
tmap_mode("plot")
NVmap_merged %>% ## Nevada-wide
  tm_shape() +
  tm_borders(lwd=0.5, alpha=1) +
  tm_fill("support", n=10, style="pretty", id=c("GEOID20"),
          palette="RdBu") +
  tm_layout(legend.show=T)

tmap_mode("view")
NVmap_merged %>% ## Zoom in on to Reno and Vegas area
  tm_shape() +
  tm_borders(lwd=0.5, alpha=1) +
  tm_fill("support", n=10, style="pretty", id=c("GEOID20"),
          palette="RdBu") +
  tm_layout(legend.show=F) 


############################## 2. Variables ###################################

data <- NVmap_merged # reduce the object name for convenience

## 1) Population and demographics

demo <- read_excel("2020_Precinct_Demographic.xlsx") %>% 
  select(GEOID20, ADJPOP, TAWHITEALN, TAHISPANIC)
colnames(demo) <- c("GEOID20", "pop", "white", "hispanic")
demo$whiteperc <- 100*demo$white / demo$pop # white percentage
demo$hispanicperc <- 100*demo$hispanic / demo$pop # hispanic percentage

## White and Hispanic groups are relevant for the RE support
## See Stokes et al. (2023) as cited in the manuscript

data <- left_join(data, demo, by="GEOID20")
data$whiteperc[is.nan(data$whiteperc)] <- 0
data$hispanicperc[is.nan(data$hispanicperc)] <- 0

# population density, capita per sq mile 
# (divide ALAND20 by 2.59e6 because ALAND20 is sq meter)
data$popden <- round(data$pop / (data$ALAND20/2.59e6),3)


## 2) Partisanship (based on the 2020 Presidential Election results)
pres2020 <- ge2020 %>% filter(contest=="President and Vice President of the United States") %>% 
  select(county, precinct, selection, vote) %>% 
  spread(selection, vote)
colnames(pres2020)[3:7] <- c("biden", "blankenship", "jorgenson", "none", "trump") 
pres2020$biden <- as.numeric(pres2020$biden)
pres2020$blankenship <- as.numeric(pres2020$blankenship)
pres2020$jorgenson <- as.numeric(pres2020$jorgenson)
pres2020$none <- as.numeric(pres2020$none)
pres2020$trump <- as.numeric(pres2020$trump)
pres2020$total <- pres2020$biden + pres2020$blankenship + pres2020$jorgenson +
  pres2020$none + pres2020$trump
pres2020$dem <- 100*pres2020$biden / pres2020$total # democratic candidate support
pres2020$dem[is.nan(pres2020$dem)] <- NA

pres2020 <- left_join(pres2020, NVcounty, by="county")

precinctkey <- read.csv("question6_refined.csv") %>% 
  select(county, precinct, GEOID20)
pres2020 <- left_join(pres2020, precinctkey, by=c("county", "precinct"))

pres2020$GEOID20 <- as.character(pres2020$GEOID20)
pres2020_dem <- pres2020 %>% select(GEOID20, dem)

data.frame(duplicates=duplicated(pres2020_dem)) %>% View()
data.frame(duplicates=duplicated(data$GEOID20)) %>% View()

data <- left_join(data, pres2020_dem, by="GEOID20")
data <- unique(data) # resolve some duplicates

## 3) remoteness
for (i in 1:nrow(data)){
  data$disttoreno[i] <- distm(c(-119.81260, 39.52605), # Reno coordinates
                                 c(as.numeric(data$INTPTLON20[i]), 
                                   as.numeric(data$INTPTLAT20[i])),
                                 fun=distGeo)/1609
  data$disttovegas[i] <- distm(c(-115.14850, 36.16720), # Vegas coordinates
                               c(as.numeric(data$INTPTLON20[i]),
                                 as.numeric(data$INTPTLAT20[i])), 
                               fun=distGeo)/1609
  data$distmetro[i] <- min(data$disttoreno[i],data$disttovegas[i])
}


## 4) distance to the closest solar & geothermal power plants
data_sp <- as(data, "Spatial")
centr <- gCentroid(data_sp, byid=TRUE)
coord <- as.data.frame(centr@coords)
coord$id <- 1:nrow(coord)
data$id <- 1:nrow(data)
colnames(coord)[1:2] <- c("long","lat")

for (i in 1:nrow(data)){
  data$long[data$id==i] <- coord$long[coord$id==i]
  data$lat[data$id==i] <- coord$lat[coord$id==i]
}

gen_coord <- read_excel("coordinates.xlsx")[-1,]
gen_coord <- gen_coord[,c(1,2,3,4,5,6,9,11,19,30)]
colnames(gen_coord) <- c("utilitycode","utilityname","gencode","genname",
                         "lat","long","county","source","mw","year")

solar_gen_coord <- gen_coord %>% filter(source=="Solar Photovoltaic")
geo_gen_coord <- gen_coord %>% filter(source=="Geothermal")

data$solardist <- data$geodist <- NA

## distance to solar
solar_dist <- vector(mode="numeric", length=nrow(solar_gen_coord))
for (i in 1:nrow(data)){
  
  centr_long <- data$long[i]
  centr_lat <- data$lat[i]
  
  for (j in 1:nrow(solar_gen_coord)){
    
    solar_gen_long <- solar_gen_coord$long[j]
    solar_gen_lat <- solar_gen_coord$lat[j]
    
    solar_dist[j] <- distm( c(centr_long, centr_lat),
                            c(solar_gen_long, solar_gen_lat),
                            fun=distGeo) / 1609
  }
  
  data$solardist[i] <- min(solar_dist)
  
}

## distance to geothermal
geo_dist <- vector(mode="numeric", length=nrow(geo_gen_coord))
for (i in 1:nrow(data)){
  
  centr_long <- data$long[i]
  centr_lat <- data$lat[i]
  
  for (j in 1:nrow(geo_gen_coord)){
    
    geo_gen_long <- geo_gen_coord$long[j]
    geo_gen_lat <- geo_gen_coord$lat[j]
    
    geo_dist[j] <- distm( c(centr_long, centr_lat),
                          c(geo_gen_long, geo_gen_lat),
                          fun=distGeo) / 1609
  }
  
  data$geodist[i] <- min(geo_dist)
  
}

############################# 3. Pre-Analysis ##################################

## check non-linearity and skewed distribution of the variable
## if present, use logarithmic transformation

hist(data$pop, breaks=100)
data$pop_l <- log(data$pop + 1) 

hist(data$popden, breaks=100)
data$popden_l <- log(data$popden + 1)

hist(data$distmetro, breaks=100)
data$distmetro_l <- log(data$distmetro + 1)

hist(data$solardist, breaks=100)
data$solardist_l <- log(data$solardist + 1)

hist(data$geodist, breaks=100)
data$geodist_l <- log(data$geodist + 1)

## 0) create an index of relative rurality (Kim and Waldorf, 2018)

# population size
data$pop_re <- (max(data$pop_l, na.rm=T) - data$pop_l) / 
  (max(data$pop_l, na.rm=T) - min(data$pop_l, na.rm=T))
summary(data$pop_re)

# population density
data$popden_re <- (max(data$popden_l, na.rm=T) - data$popden_l) / 
  (max(data$popden_l, na.rm=T) - min(data$popden_l, na.rm=T))
summary(data$popden_re)

# distance from the metropolitan center
data$distmetro_re <- 1 - ((max(data$distmetro_l, na.rm=T) - data$distmetro_l) / 
                               (max(data$distmetro_l, na.rm=T) - min(data$distmetro_l, na.rm=T)))
summary(data$distmetro_re)

# use unweighted average
data$irr <- (data$pop_re + data$popden_re + data$distmetro_re) / 3
summary(data$irr); hist(data$irr, breaks=100)


############################## 4. Analysis ####################################

data <- data %>% filter(!is.na(support))
data <- data %>% filter(!is.na(irr))
data <- data %>% filter(VTDST20 != "ZZZZZZ") # empty entry

NVb <- poly2nb(data, queen=T)
NVw <- nb2listw(NVb, style="W", zero.policy=T)

data$irr <- data$irr * 100

## 1) standard linear regression
library(lmtest)
library(MASS)


result01 <- lm(support ~ irr, data)
round(coeftest(result01, vcov = vcovHC(result01, type="HC4")), 3)
summary(result01)

result02 <- lm(support ~ irr + dem, data)
round(coeftest(result02, vcov = vcovHC(result02, type="HC4")), 3)
summary(result02)

result03 <- lm(support ~ irr + solardist + geodist, data)
round(coeftest(result03, vcov = vcovHC(result03, type="HC4")), 3)
summary(result03)

result04 <- lm(support ~ irr + dem + solardist + geodist + 
                 whiteperc + hispanicperc + factor(COUNTYFP20), data)
round(coeftest(result04, vcov = vcovHC(result04, type="HC4")), 3)
summary(result04)

result05 <- lm(support ~  dem + solardist + geodist + 
                 whiteperc + hispanicperc + factor(COUNTYFP20), data)
round(coeftest(result05, vcov = vcovHC(result05, type="HC4")), 3)
summary(result05)

## 2) spatial lag models

result11 <- lagsarlm(support ~ irr + dem + solardist + geodist + 
                       whiteperc + hispanicperc, data, listw=NVw)
summary(result11)

round(result11$coefficients,3)

## 3) spatial error models

result21 <- errorsarlm(support ~ irr + dem + solardist + geodist + 
                       whiteperc + hispanicperc, data, listw=NVw)
summary(result21)
round(result21$coefficients,3)

########################## descriptive statistics #############################

ggplot(data) +
  geom_point(aes(x=irr, y=dem, color=support)) +
  xlab("Index of Relative Rurality") +
  ylab("Democratic Presidential Support") +
  labs(color="Ballot Support") +
  theme(text = element_text(size=15)) +
  scale_color_gradient2(low="red",high="blue") +
  theme_pubclean() 

cor.test(data$dem, data$irr, method="pearson")

ggplot(data) +
  geom_point(aes(x=irr, y=solardist, color=support)) +
  xlab("Index of Relative Rurality") +
  ylab("Distance to the Closest Solar Facility") +
  labs(color="Ballot Support") +
  theme(text = element_text(size=15)) +
  scale_color_gradient2(low="red",high="blue") +
  theme_pubclean() 

cor.test(data$solardist, data$irr, method="pearson")

ggplot(data) +
  geom_point(aes(x=irr, y=geodist, color=support)) +
  xlab("Index of Relative Rurality") +
  ylab("Distance to the Closest Geothermal Facility ") +
  labs(color="Ballot Support") +
  theme(text = element_text(size=15)) +
  scale_color_gradient2(low="red",high="blue") +
  theme_pubclean() 

cor.test(data$geodist, data$irr, method="pearson")

summary(data)


######################### counterfactual plots ################################
library(simcf)

fit <- lm(support ~ irr + dem + solardist + geodist + 
            whiteperc + hispanicperc, data)

summary(fit)
pe <- fit$coefficients
vc <- vcovHC(fit, type="HC4")

coefs <- mvrnorm(10000, pe, vc); dim(coefs)

ev <- matrix(NA, nrow=9, ncol=9)

for (i in seq(from=10, to=90, by=10)) { # dem
  
  for (k in seq(from=10, to=90, by=10)) { # irr
    
    intact <- i*k
    scen <- c(1, k, i, mean(data$solardist, na.rm=T), mean(data$geodist, na.rm=T), 
              mean(data$whiteperc, na.rm=T), mean(data$hispanicperc, na.rm=T))
    
    cf <- coefs %*% scen
    ev[k/10, i/10] <- round(mean(cf),2)
    
  }  
  
}


quantile(data$irr, probs=0.10, na.rm=T)
irr18 <- irr18_lwr <- irr18_upr <- irr18_mean <- NA
for (i in 1:100) {
  irr18 <- coefs %*% c(1, 18, i, mean(data$solardist, na.rm=T), mean(data$geodist, na.rm=T), 
                       mean(data$whiteperc, na.rm=T), mean(data$hispanicperc, na.rm=T))
  irr18_mean[i] <- mean(irr18)
  irr18_lwr[i] <- quantile(irr18, probs=0.025)
  irr18_upr[i] <- quantile(irr18, probs=0.975)
}


quantile(data$irr, probs=0.50, na.rm=T)
irr29 <- irr29_lwr <- irr29_upr <- irr29_mean <- NA
for (i in 1:100) {
  irr29 <- coefs %*% c(1, 29, i, mean(data$solardist, na.rm=T), mean(data$geodist, na.rm=T), 
                       mean(data$whiteperc, na.rm=T), mean(data$hispanicperc, na.rm=T))
  irr29_mean[i] <- mean(irr29)
  irr29_lwr[i] <- quantile(irr29, probs=0.025)
  irr29_upr[i] <- quantile(irr29, probs=0.975)
}

quantile(data$irr, probs=0.90, na.rm=T)
irr79 <- irr79_lwr <- irr79_upr <- irr79_mean <- NA
for (i in 1:100) {
  irr79 <- coefs %*% c(1, 79, i, mean(data$solardist, na.rm=T), mean(data$geodist, na.rm=T), 
                       mean(data$whiteperc, na.rm=T), mean(data$hispanicperc, na.rm=T))
  irr79_mean[i] <- mean(irr79)
  irr79_lwr[i] <- quantile(irr79, probs=0.025)
  irr79_upr[i] <- quantile(irr79, probs=0.975)
}

plotdata <- data.frame(
  irr=c(rep(18,100),rep(29,100),rep(79,100)), 
  dem=rep(c(1:100),3), 
  pe=c(irr18_mean, irr29_mean, irr79_mean),
  lwr=c(irr18_lwr, irr29_lwr, irr79_lwr), 
  upr=c(irr18_upr, irr29_upr, irr79_upr)
)

plotdata$irr <- as.factor(plotdata$irr)

source("theme_caviz.R")

labels <- c("79"="Strongly urban","29"="Median rural","18"="Strongly rural")
breaks=c("79","29","18")

ggplot(plotdata, aes(x=dem, y=pe, group=irr, color=irr, fill=irr, linetype=irr)) +
  geom_line(size=1) +
  geom_ribbon(aes(ymin=lwr, ymax=upr), alpha=.3, linetype=0) +
  theme_caviz_hgrid +
  scale_linetype_manual(breaks=breaks, labels=labels,
                        values=c("solid","dashed","dotdash")) +
  scale_color_manual(breaks=breaks, labels=labels,
                     values=c("#deca3c", "#66de3c", "#6c0699")) +
  scale_fill_manual(breaks=breaks, labels=labels,
                    values=c("#deca3c", "#66de3c", "#6c0699")) +
  theme(text=element_text(size=20)) +
  labs(color=NULL, fill=NULL, linetype=NULL)
  




